airpol <-
read.csv("~/GitHub/STA135/Homework/HW1/Air-Pollution Data G.C.Tao.csv",
header = TRUE)
library(ggplot2)
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.1.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.1.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#Pairwise plots
ggpairs(airpol)
#Marginal plots
colnames <- names(airpol)
for (i in colnames[-1]){
print(ggMarginal(ggplot(airpol, aes_string(x = "Wind..x1.", y = i)) + geom_point(color = 'firebrick'), type = 'histogram', fill = 'dodgerblue'))
}
### b.)
#xbar, mean vector
colMeans(airpol[sapply(airpol, is.numeric)])
## Wind..x1. Solar.Radiation..x2. CO.x3.
## 7.500000 73.857143 4.547619
## NO.x4. N02.x5. O3.x6.
## 2.190476 10.047619 9.404762
## HC.x7.
## 3.095238
#Sn, the COV/VAR marix
n = nrow(airpol)
cov(airpol) * (n-1)/n
## Wind..x1. Solar.Radiation..x2. CO.x3. NO.x4.
## Wind..x1. 2.4404762 -2.7142857 -0.3690476 -0.4523810
## Solar.Radiation..x2. -2.7142857 293.3605442 3.8163265 -1.3537415
## CO.x3. -0.3690476 3.8163265 1.4858277 0.6575964
## NO.x4. -0.4523810 -1.3537415 0.6575964 1.1541950
## N02.x5. -0.5714286 6.6020408 2.2596372 1.0623583
## O3.x6. -2.1785714 30.0578231 2.7545351 -0.7913832
## HC.x7. 0.1666667 0.6088435 0.1383220 0.1723356
## N02.x5. O3.x6. HC.x7.
## Wind..x1. -0.5714286 -2.1785714 0.1666667
## Solar.Radiation..x2. 6.6020408 30.0578231 0.6088435
## CO.x3. 2.2596372 2.7545351 0.1383220
## NO.x4. 1.0623583 -0.7913832 0.1723356
## N02.x5. 11.0929705 3.0521542 1.0192744
## O3.x6. 3.0521542 30.2409297 0.5804989
## HC.x7. 1.0192744 0.5804989 0.4671202
# R the correlation matrix
round(cor(airpol),2)
## Wind..x1. Solar.Radiation..x2. CO.x3. NO.x4. N02.x5.
## Wind..x1. 1.00 -0.10 -0.19 -0.27 -0.11
## Solar.Radiation..x2. -0.10 1.00 0.18 -0.07 0.12
## CO.x3. -0.19 0.18 1.00 0.50 0.56
## NO.x4. -0.27 -0.07 0.50 1.00 0.30
## N02.x5. -0.11 0.12 0.56 0.30 1.00
## O3.x6. -0.25 0.32 0.41 -0.13 0.17
## HC.x7. 0.16 0.05 0.17 0.23 0.45
## O3.x6. HC.x7.
## Wind..x1. -0.25 0.16
## Solar.Radiation..x2. 0.32 0.05
## CO.x3. 0.41 0.17
## NO.x4. -0.13 0.23
## N02.x5. 0.17 0.45
## O3.x6. 1.00 0.15
## HC.x7. 0.15 1.00
We can see that none of the variables have a very high correlation. We can see the highest correlation in our data is between Carbon Monoxide (CO) and Nitrogen Dioxide (NO2) of 0.557. We also can see that Wind is negatively correlated with all pollutants.
#a.)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
x1 <- c(-6, -3, -2, 1, 2, 5, 6, 8)
x2 <- c(-2, -3, 1, -1, 2, 1, 5, 3)
mydata <- data.frame(x1, x2)
#ggplot(aes(x = x1, y = x2)) + geom_point()
fig <- plot_ly(mydata, x = ~x1, y = ~x2)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
n = nrow(mydata)
myvarmat = cov(mydata) * (n-1)/n
myvarmat
## x1 x2
## x1 20.48438 9.09375
## x2 9.09375 6.18750
s11 = myvarmat[1]
s12 = myvarmat[2]
s22 = myvarmat[4]
#b.)
#Formula pdf page 56
# x1~ = x1 costheta + x2 sintheta
x1tilde = x1 * 0.899 + x2 * 0.438
# x2~ = -x1 sintheta + x2 costheta
x2tilde = -x1 * 0.438 + x2 * 0.899
#c
datatilde = data.frame(x1tilde, x2tilde)
n = nrow(datatilde)
diag(var(datatilde)) #* (n-1)/n)
## x1tilde x2tilde
## 28.461798 2.021716
#d
newx1 = 4 * 0.899 + -2 * 0.438
newx2 = -4 * 0.438 + -2 * 0.899
dOP = sqrt((newx1^2/28.462) + (newx2^2/2.022))
dOP
## [1] 2.548064
#e
#From footnote
a11 = (0.899^2/(0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) + (0.438^2/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))
a22 = (0.438^2 / (0.899^2 * s11 + 2 * 0.438 * 0.899 *s12 + 0.438^2 * s22)) + (0.899^2 / (0.899^2 * s22 - 2 * 0.438 * 0.899*s12 + 0.438^2 * s11))
a12 = ((0.438 * 0.899) / (0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) - ((0.438 * 0.899)/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))
dOP2 = sqrt((a11 * 16) + (2*a12 * 4 * -2) + (a22 * 4))
dOP2
## [1] 2.724179
We can see that within rounding error, these two distances are about the same.
WomenTrack <- read.csv("~/Github/STA135/Homework/HW1/National Track Records for Women.csv", header = TRUE)
#WomenTrack
#First, convert last 4 columns into seconds
WomenTrack[,5:8] = WomenTrack[, 5:8] * 60
#WomenTrack
#Second, divide each column by respective meters to get meters per second
meters = c(1, 100, 200, 400, 800, 1500, 3000, 42195)
for (i in 2:length(meters)){
WomenTrack[,i] <- meters[i]/ WomenTrack[,i]
}
WomenTrack
## Country X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min.
## 1 Argentina 8.643042 8.718396 7.619048 6.504065 5.882353
## 2 Australia 8.992806 8.996851 8.225375 6.734007 6.218905
## 3 Austria 8.968610 8.810573 7.902015 6.872852 6.172840
## 4 Belgium 8.976661 8.896797 7.774538 6.768190 6.127451
## 5 Bermuda 8.726003 8.676790 7.504690 6.441224 5.827506
## 6 Brazil 8.952551 8.849558 7.902015 6.768190 5.995204
## 7 Canada 9.107468 8.841733 8.014426 6.768190 6.250000
## 8 Chile 8.583691 8.389262 7.451565 6.666667 5.924171
## 9 China 9.267841 9.086779 8.030516 6.908463 6.510417
## 10 Columbia 8.841733 8.726003 8.058018 6.535948 5.760369
## 11 Cook Islands 7.987220 7.719027 6.488240 5.847953 5.186722
## 12 Costa Rica 8.532423 8.361204 7.608902 6.349206 5.530973
## 13 Czech Republic 9.017133 9.103323 8.335070 7.054674 6.203474
## 14 Denmark 8.756567 8.561644 7.558579 6.600660 6.067961
## 15 Domincan Republic 8.598452 8.364701 7.544323 6.379585 5.506608
## 16 Finland 8.984726 8.932559 7.977663 6.633499 6.097561
## 17 France 9.319664 9.095043 8.290155 6.872852 6.203474
## 18 Germany 9.250694 9.212345 8.403361 6.944444 6.313131
## 19 Great Britian 9.009009 9.049774 8.092252 6.872852 6.297229
## 20 Greece 9.233610 8.822232 7.911392 6.666667 6.112469
## 21 Guatemala 8.389262 8.163265 7.189073 6.201550 5.580357
## 22 Hungary 8.764242 8.673027 7.766990 6.700168 6.218905
## 23 India 8.650519 8.382230 7.262164 6.349206 5.733945
## 24 Indonesia 8.787346 8.764242 7.835455 6.666667 6.097561
## 25 Ireland 8.748906 8.688097 7.832387 6.633499 6.281407
## 26 Israel 8.733624 8.639309 7.683442 6.441224 5.896226
## 27 Italy 8.976661 8.849558 7.795751 6.802721 6.281407
## 28 Japan 8.802817 8.572653 7.702677 6.633499 6.009615
## 29 Kenya 8.605852 8.557980 7.757952 6.768190 6.313131
## 30 Korea, South 8.703220 8.403361 7.452953 6.379585 5.896226
## 31 Korea, North 8.474576 7.968127 7.113640 6.768190 5.882353
## 32 Luxembourg 8.503401 8.347245 7.133940 6.441224 5.747126
## 33 Malaysia 8.695652 8.557980 7.610350 6.289308 5.694761
## 34 Mauritius 8.532423 8.392782 7.323325 6.472492 5.773672
## 35 Mexico 9.017133 8.646779 8.181632 6.600660 5.966587
## 36 Myanmar(Burma) 8.576329 8.442381 7.552870 6.568144 5.952381
## 37 Netherlands 9.025271 8.768084 7.789679 6.908463 6.157635
## 38 New Zealand 8.833922 8.646779 7.751938 6.768190 6.097561
## 39 Norway 8.764242 8.580009 7.626311 6.568144 6.234414
## 40 Papua New Guinea 8.361204 8.103728 7.249003 5.952381 5.411255
## 41 Philippines 8.865248 8.565310 7.305936 6.289308 5.668934
## 42 Poland 9.149131 9.037506 8.116883 6.837607 6.265664
## 43 Portugal 8.849558 8.741259 7.704160 6.734007 6.313131
## 44 Romania 8.849558 8.948546 8.019246 6.944444 6.410256
## 45 Russia 9.285051 9.144947 8.144981 6.980803 6.459948
## 46 Samoa 8.077544 7.858546 7.102273 5.822416 4.612546
## 47 Singapore 8.244023 8.149959 7.262164 6.289308 5.530973
## 48 Spain 9.041591 8.936550 8.053151 6.802721 6.234414
## 49 Sweden 8.960573 8.764242 7.738441 6.700168 6.112469
## 50 Switzerland 8.818342 8.741259 7.794232 6.734007 6.297229
## 51 Taiwan 8.912656 8.865248 7.584376 6.410256 5.707763
## 52 Thailand 8.826125 8.583691 7.604563 6.472492 5.707763
## 53 Turkey 8.888889 8.806693 7.525870 6.633499 6.377551
## 54 U.S.A. 9.532888 9.372071 8.191685 6.872852 6.329114
## X3000m.min. Marathon
## 1 5.440696 4.678353
## 2 5.793743 4.900355
## 3 5.694761 4.556203
## 4 5.668934 4.916113
## 5 5.096840 4.037490
## 6 5.530973 4.770708
## 7 5.854801 4.740159
## 8 5.399568 4.619654
## 9 6.172840 5.045197
## 10 5.336179 4.531542
## 11 4.504505 3.312061
## 12 5.081301 4.279499
## 13 5.636979 4.843653
## 14 5.740528 4.709053
## 15 5.055612 4.224739
## 16 5.753740 4.751689
## 17 5.787037 4.743036
## 18 5.875441 4.971721
## 19 5.973716 5.199630
## 20 5.580357 4.584420
## 21 5.149331 4.104652
## 22 5.847953 4.735690
## 23 5.263158 4.557975
## 24 5.488474 4.448134
## 25 5.980861 4.944456
## 26 5.359057 4.497634
## 27 5.820722 4.901722
## 28 5.720824 5.044473
## 29 5.959476 5.078717
## 30 5.549390 4.812825
## 31 5.580357 4.839653
## 32 5.428882 4.712524
## 33 5.370569 4.154360
## 34 5.411255 4.208810
## 35 5.624297 4.881647
## 36 5.506608 4.439149
## 37 5.834306 4.903089
## 38 5.707763 4.801652
## 39 5.861665 4.985467
## 40 4.897160 3.180112
## 41 5.096840 4.249758
## 42 5.861665 4.877584
## 43 5.882353 4.907879
## 44 5.980861 4.935088
## 45 5.966587 4.976647
## 46 3.810976 3.670790
## 47 5.030181 4.554433
## 48 5.896226 4.800014
## 49 5.675369 4.676175
## 50 5.813953 4.833001
## 51 5.192108 4.408262
## 52 4.965243 4.330624
## 53 5.861665 4.644060
## 54 5.931198 4.981935
#mean vector
colMeans(WomenTrack[sapply(WomenTrack, is.numeric)])
## X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min. X3000m.min.
## 8.814772 8.664408 7.712067 6.604214 5.989687 5.542701
## Marathon
## 4.620264
#Sigma
n = nrow(WomenTrack)
cov(WomenTrack[,-1]) * (n-1/n)
## X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min. X3000m.min.
## X100m.s. 4.887390 5.160972 5.218521 3.512252 4.438347 4.973973
## X200m.s. 5.160972 6.190134 6.146866 4.044555 5.183245 5.691616
## X400m.s. 5.218521 6.146866 7.438047 4.369310 5.152154 5.847082
## X800m.min. 3.512252 4.044555 4.369310 3.968872 4.666927 5.384904
## X1500m.min. 4.438347 5.183245 5.152154 4.666927 6.685094 7.757939
## X3000m.min. 4.973973 5.691616 5.847082 5.384904 7.757939 9.532284
## Marathon 4.377891 5.037030 5.499673 5.090757 6.394526 7.911549
## Marathon
## X100m.s. 4.377891
## X200m.s. 5.037030
## X400m.s. 5.499673
## X800m.min. 5.090757
## X1500m.min. 6.394526
## X3000m.min. 7.911549
## Marathon 8.999474
#R
cor(WomenTrack[,-1])
## X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min. X3000m.min.
## X100m.s. 1.0000000 0.9383028 0.8655248 0.7974687 0.7764777 0.7287297
## X200m.s. 0.9383028 1.0000000 0.9058875 0.8159945 0.8057456 0.7409469
## X400m.s. 0.8655248 0.9058875 1.0000000 0.8041737 0.7306437 0.6944025
## X800m.min. 0.7974687 0.8159945 0.8041737 1.0000000 0.9060324 0.8754795
## X1500m.min. 0.7764777 0.8057456 0.7306437 0.9060324 1.0000000 0.9718385
## X3000m.min. 0.7287297 0.7409469 0.6944025 0.8754795 0.9718385 1.0000000
## Marathon 0.6601124 0.6748635 0.6722005 0.8518052 0.8244153 0.8541900
## Marathon
## X100m.s. 0.6601124
## X200m.s. 0.6748635
## X400m.s. 0.6722005
## X800m.min. 0.8518052
## X1500m.min. 0.8244153
## X3000m.min. 0.8541900
## Marathon 1.0000000
We can see that as a runner travels more distance, the correlation between their 100m other distances decreases. However, the correlation with their marathon rate increases.
#Problem 2.2
A = matrix(c(-1, 3, 4, 2),
nrow = 2,
ncol = 2,
byrow = TRUE)
B = matrix(c(4, -3, 1, -2,-2, 0),
nrow = 3,
ncol = 2,
byrow = TRUE)
C = matrix(c(5,-4, 2), nrow = 3, ncol = 1)
#a.)
5 * A
## [,1] [,2]
## [1,] -5 15
## [2,] 20 10
#b.)
B %*% A
## [,1] [,2]
## [1,] -16 6
## [2,] -9 -1
## [3,] 2 -6
#c.)
t(A) %*% t(B)
## [,1] [,2] [,3]
## [1,] -16 -9 2
## [2,] 6 -1 -6
#d.)
t(C) %*% B
## [,1] [,2]
## [1,] 12 -7
#e.) #Produces an error, non-conformable arguments
# Dimensions for these matricies are not aligned
# A %*% B
A = matrix(c(2, 1, 1, 3),
nrow = 2,
ncol = 2,
byrow = TRUE)
B = matrix(c(1, 4, 2, 5, 0, 3),
nrow = 2,
ncol = 3,
byrow = TRUE)
C = matrix(c(1, 4, 3, 2),
nrow = 2,
ncol = 2,
byrow = TRUE)
#a.)
all.equal(A, t(A))
## [1] TRUE
#b.)
all.equal(solve(t(C)), t(solve(C)))
## [1] TRUE
#c.)
all.equal(t(A %*% B), t(B) %*% t(A))
## [1] TRUE
e.) Let’s first show that:
\[ (AB)^T_{ij} = (AB)_{ji} = \sum^n_{k=1}A_{jk}B_{ki} \]
Next we show that: \[ (B^TA^T)_{ij}= \sum\limits_{k=1}^nB^T_{ik}A^T_{kj}= \sum\limits_{k=1}^nB_{ki}A_{jk}= \sum\limits_{k=1}^nA_{jk}B_{ki} \]
Since we can see that both sides of the equation \((AB)_{ji} = (AB)^T_{ij}\) equal the same summation. We can conclude that in the general case the transpose of the product of \(A\) and \(B\) is equal to the product of \(A^T\) and \(B^T\).
a.) \[\begin{aligned} (A^{-1})^T &= (A^{-1})^TA^T(A^T)^{-1} \\ &=(AA^{-1})^T(A^T)^{-1} \\ &=I^T(A^T)^{-1} \\ &= (A^T)^{-1} \\ \end{aligned} \]
b.) \[\begin{aligned} (AB)^{-1} &= B^{-1}A^{-1} \\ &= B^{-1}A^{-1}(AB)(AB)^{-1}\\ &= B^{-1}(A^{-1}A)B(AB)^{-1} \\ &= I(AB)^{-1}\\ &= (AB)^{-1} \end{aligned} \] ## Problem 5 \[ A = \begin{pmatrix} 1 & 2 \\ 2 & -2 \\ \end{pmatrix}\\ \] To find eigenvaules we solve: \[\begin{aligned} |A - \lambda * I| &= 0\\ det(\begin{pmatrix} 1 & 2 \\ 2 & -2 \\ \end{pmatrix} - \begin{pmatrix} \lambda & 0 \\ 0 & \lambda \\ \end{pmatrix} &= \\ det(\begin{pmatrix} 1 - \lambda & 2 \\ 2 & -2 - \lambda \\ \end{pmatrix}) &= \\ -2 - \lambda + 2\lambda + \lambda^2 - 4 &= \\ \lambda^2 + \lambda - 6 &= \\ (\lambda + 3)(\lambda - 2) &= \\ \lambda_1 = -3, \lambda_2 = 2 \end{aligned} \] Now we find the eigenvectors. Here is the first eigenvalue, -3.
\[\begin{aligned} Av &= \lambda_1v\\ (A - \lambda_1)v &= 0 \\ (A + 3)v &= 0 \\ \begin{pmatrix} 4 & 2 \\ 2 & 1 \\ \end{pmatrix} v &= 0\\ \begin{pmatrix} 4 & 2 \\ 2 & 1 \\ \end{pmatrix} \begin{pmatrix} v_1 \\ v_2 \\ \end{pmatrix} &= 0\\ 4v_1 + 2v_2 &= 0\\ 2v_1 + v_2 &= 0 \\ v_1 = k \begin{pmatrix} 1/2 \\ 1 \\ \end{pmatrix} \end{aligned}\] Now the second eigenvector with the eigenvalue 2. \[\begin{aligned} Av &= \lambda_1v\\ (A - \lambda_2)v &= 0 \\ (A- 2)v &= 0 \\ \begin{pmatrix} -1 & 2 \\ 2 & -4 \\ \end{pmatrix} v &= 0\\ \begin{pmatrix} -1 & 2 \\ 2 & -4 \\ \end{pmatrix} \begin{pmatrix} v_1 \\ v_2 \\ \end{pmatrix} &= 0\\ -1v_1 + 2v_2 &= 0\\ 2v_1 + -4v_2 &= 0 \\ v_2 = k \begin{pmatrix} 2 \\ 1 \\ \end{pmatrix} \end{aligned}\] First we normalize the vectors \[\begin{aligned} v_1 &= \begin{pmatrix} 1/2 \\ 1 \\ \end{pmatrix}\\ ||v_1|| &= \sqrt{5}/4 \\ \hat{v_1} &= \begin{pmatrix} 8/\sqrt{5}\\ 4/\sqrt{5}\\ \end{pmatrix}\\ \\ v_2 = \begin{pmatrix} 2 \\ 1 \\ \end{pmatrix}\\ ||v_2|| &= \sqrt{5}\\ \hat{v_2} &= \begin{pmatrix} 2/\sqrt{5}\\ 1/\sqrt{5}\\ \end{pmatrix} \end{aligned}\]The spectral decomposition thus is
\[\begin{aligned} 2 \begin{pmatrix} 8/\sqrt{5}\\ 4/\sqrt{5}\\ \end{pmatrix} \begin{pmatrix} 8/\sqrt{5} & 4/\sqrt{5}\\ \end{pmatrix} - 3 \begin{pmatrix} 8/\sqrt{5}\\ 4/\sqrt{5}\\ \end{pmatrix} \begin{pmatrix} 2/\sqrt{5} & 1/\sqrt{5}\\ \end{pmatrix} \end{aligned}\]P 2.11
See by hand
P 2.12
P 2.30
P 2. 34
b <- matrix(c(2, -1, 4,0), nrow= 4, ncol = 1)
d <- matrix(c(-1,3,-2,1), nrow= 4, ncol = 1)
LHS <- as.numeric((t(b) %*% d)^2)
RHS <- as.numeric((t(b) %*% b) %*% (t(d) %*% d))
LHS > RHS
## [1] FALSE